A principio foram utilizados dados disponiveis no repositorio pertencente a Wesley Cota https://github.com/wcota/covid19br e Rami Krispin https://github.com/RamiKrispin/covid19sf. Estes pacotes forncem uma base de dados sobre casos de covid no Brasil e no mundo.
Serão necessarios os eguintes pacotes:
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(geobr)
## Loading required namespace: sf
library(tmap)
## Registered S3 methods overwritten by 'stars':
## method from
## st_bbox.SpatRaster sf
## st_crs.SpatRaster sf
Eles serão necessários para ler, obter, tratar e juntar bases de dados tabulares e dados georreferenciados
getwd()
## [1] "C:/Users/Fellipe/Desktop/git/fellipe.mira.github.io"
setwd("C:/Users/Fellipe/Desktop/git/fellipe.mira.github.io/")
url <- "https://raw.githubusercontent.com/wcota/covid19br/master/cases-brazil-states.csv"
dados <- readr::read_csv(url)
## Rows: 18494 Columns: 26
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (3): country, state, city
## dbl (22): epi_week, newDeaths, deaths, newCases, totalCases, deathsMS, tota...
## date (1): date
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
Analisando as estruturas dos dados disponiveis no github
glimpse(dados)
## Rows: 18,494
## Columns: 26
## $ epi_week <dbl> 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10~
## $ date <date> 2020-02-25, 2020-02-25, 2020-02~
## $ country <chr> "Brazil", "Brazil", "Brazil", "B~
## $ state <chr> "SP", "TOTAL", "SP", "TOTAL", "S~
## $ city <chr> "TOTAL", "TOTAL", "TOTAL", "TOTA~
## $ newDeaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ deaths <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ newCases <dbl> 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,~
## $ totalCases <dbl> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,~
## $ deathsMS <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ totalCasesMS <dbl> 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2,~
## $ deaths_per_100k_inhabitants <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ totalCases_per_100k_inhabitants <dbl> 0.00218, 0.00047, 0.00218, 0.000~
## $ deaths_by_totalCases <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,~
## $ recovered <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ suspects <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ tests_per_100k_inhabitants <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_per_100_inhabitants <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_second <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_second_per_100_inhabitants <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_single <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_single_per_100_inhabitants <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_third <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
## $ vaccinated_third_per_100_inhabitants <dbl> NA, NA, NA, NA, NA, NA, NA, NA, ~
Como nosso objetivo é trabalhar com os estados devemos transformar a variavel “state” para factor
dados$state <- as.factor(dados$state)
seleceionaremos as colunas state e deaths, agruparemos por estado a partir da soma do numero de dados
a <- dados %>%
select(state, deaths) %>%
group_by(state) %>%
summarise(total_estado = sum(deaths))
| state | total_estado |
|---|---|
| AC | 694774 |
| AL | 2196551 |
| AM | 5398133 |
| AP | 750333 |
| BA | 8928256 |
| CE | 9008901 |
| DF | 3635919 |
| ES | 4465196 |
| GO | 7238361 |
| MA | 3742990 |
| MG | 16083919 |
| MS | 2820579 |
| MT | 4529184 |
| PA | 6394536 |
| PB | 3283874 |
| PE | 7575340 |
| PI | 2484523 |
| PR | 11107877 |
| RJ | 22755645 |
| RN | 2654757 |
| RO | 2180167 |
| RR | 721558 |
| RS | 10795835 |
| SC | 5971961 |
| SE | 2159415 |
| SP | 48493877 |
| TO | 1232323 |
| TOTAL | 197304784 |
Até agora trabalhamos apenas com dados tabulares, mas com o auxilio do pacote geobr faremos o download de dados vetoriais dos estados brasileiros.
states <- read_state(year = 2019)
## Using year 2019
## Loading data for the whole country
##
|
| | 0%
|
|=== | 4%
|
|===== | 7%
|
|======== | 11%
|
|========== | 15%
|
|============= | 19%
|
|================ | 22%
|
|================== | 26%
|
|===================== | 30%
|
|======================= | 33%
|
|========================== | 37%
|
|============================= | 41%
|
|=============================== | 44%
|
|================================== | 48%
|
|==================================== | 52%
|
|======================================= | 56%
|
|========================================= | 59%
|
|============================================ | 63%
|
|=============================================== | 67%
|
|================================================= | 70%
|
|==================================================== | 74%
|
|====================================================== | 78%
|
|========================================================= | 81%
|
|============================================================ | 85%
|
|============================================================== | 89%
|
|================================================================= | 93%
|
|=================================================================== | 96%
|
|======================================================================| 100%
glimpse(states)
## Rows: 27
## Columns: 6
## $ code_state <dbl> 11, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 27, 2~
## $ abbrev_state <chr> "RO", "AC", "AM", "RR", "PA", "AP", "TO", "MA", "PI", "CE~
## $ name_state <chr> "Rondônia", "Acre", "Amazônas", "Roraima", "Pará", "Amapá~
## $ code_region <dbl> 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, ~
## $ name_region <chr> "Norte", "Norte", "Norte", "Norte", "Norte", "Norte", "No~
## $ geom <MULTIPOLYGON [°]> MULTIPOLYGON (((-65.3815 -1..., MULTIPOLYGON~
Conhecendo os dados podemos realizar o join das duas bases de dados
dados_com_geom <- right_join(x = a,
y = states,
by= c("state" = "abbrev_state"))
Agora iremos avaliar como nossa base de dados está estruturada
dados_com_geom %>% glimpse()
## Rows: 27
## Columns: 7
## $ state <chr> "AC", "AL", "AM", "AP", "BA", "CE", "DF", "ES", "GO", "MA~
## $ total_estado <dbl> 694774, 2196551, 5398133, 750333, 8928256, 9008901, 36359~
## $ code_state <dbl> 12, 27, 13, 16, 29, 23, 53, 32, 52, 21, 31, 50, 51, 15, 2~
## $ name_state <chr> "Acre", "Alagoas", "Amazônas", "Amapá", "Bahia", "Ceará",~
## $ code_region <dbl> 1, 2, 1, 1, 2, 2, 5, 3, 5, 2, 3, 5, 5, 1, 2, 2, 2, 4, 3, ~
## $ name_region <chr> "Norte", "Nordeste", "Norte", "Norte", "Nordeste", "Norde~
## $ geom <MULTIPOLYGON [°]> MULTIPOLYGON (((-71.07772 -..., MULTIPOLYGON~
head(dados_com_geom)
## # A tibble: 6 x 7
## state total_estado code_state name_state code_region name_region
## <chr> <dbl> <dbl> <chr> <dbl> <chr>
## 1 AC 694774 12 Acre 1 Norte
## 2 AL 2196551 27 Alagoas 2 Nordeste
## 3 AM 5398133 13 Amazônas 1 Norte
## 4 AP 750333 16 Amapá 1 Norte
## 5 BA 8928256 29 Bahia 2 Nordeste
## 6 CE 9008901 23 Ceará 2 Nordeste
## # ... with 1 more variable: geom <MULTIPOLYGON [°]>
primeiramente iremos transformar os dados que foram unidos para classe sf (simple feature) o qual é util para transformar dados tabulares em dados “spatio-temporal” (por este motivo as funções começam com st_)
dados_com_geom <- sf::st_as_sf(as.data.frame(dados_com_geom))
tmap_mode(mode = 'view')
## tmap mode set to interactive viewing
tm_shape(dados_com_geom)+
tm_fill('total_estado',
palette = 'Blues')+
tm_shape(dados_com_geom)+
tm_borders(col = 'gray')